import numpy as np 
import pdb
from scipy.optimize import minimize
from random import sample 
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, DotProduct, WhiteKernel
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
from numpy import linalg as LA
from scipy.linalg import sqrtm, cholesky, cho_solve
from scipy.stats import multivariate_normal

def DilutedES(X, Y):
	CtRes = X[Y==0, -1]; TrRes = X[Y==1, -1]; 
	CtN = len(CtRes); TrN = len(TrRes)

	CtRes = X[Y==0, -1]; TrRes = X[Y==1, -1]; 
	CtN = len(CtRes); TrN = len(TrRes)

	EffectDifference = np.mean(TrRes) - np.mean(CtRes)
	STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
	EffectSize = EffectDifference / STD
	return EffectSize


class GPcls():
	def __init__(self):
		self.X = None
		self.Y = None
		self.mu = None
		self.kernel = 1.0 * RBF(1e-5)
		self.Sigma  = None 
		self.f = None 
		self.SigmaInv = None
		self.NegLogLoss = np.inf


	def fit(self, X, Y, Mu, Iter=100):

		self.NegLogLoss = np.inf
		self.X = X; self.Y = Y.reshape((-1,1)); self.Y = 2 * self.Y - 1
		self.mu = Mu.reshape((-1,1))  
		self.Sigma = self.kernel.__call__(X)
		self.SigmaInv = LA.inv(self.Sigma)
		# self.f = np.zeros(len(self.Y)).reshape((-1, 1))
		f = np.random.normal(size = len(self.Y)).reshape((-1, 1))
		for i in range(Iter):
			L = self.Posterior(f)
			# if np.abs(self.NegLogLoss  - L) <10:
			# 	break
			if self.NegLogLoss  - L > 0:
				self.f = f
				self.NegLogLoss  = L
			# print('MAP  %.10f'%L)
			D = -1/(1 + np.exp(-f)) * (1 - 1/(1 + np.exp(-f))) # diagonal of the hessian matrix 
			W = -np.diag(D.reshape(-1)) # hessians 
			W_sr = np.sqrt(W)
			W_sr_K = W_sr * self.Sigma
			
			K_W_sr = np.matmul(self.Sigma, W_sr)
			B = np.eye(W.shape[0]) + W_sr_K * W_sr
			L = cholesky(B, lower=True)
			b = np.matmul(W, f) + (self.Y + 1)/2 - 1/(1 + np.exp(-f))
			x0 = cho_solve((L, True), W_sr); # B^-1W^{1/2}; solution of W^{1/2}=LL^Tx0
			x1 = cho_solve((L, True), W_sr_K.dot(b)); # B^{-1}W^{1/2}Kb; solution of w^{1/2}Kb = LL^Tx1

			c0 = np.identity(len(self.Y)) - np.matmul(K_W_sr, x0);# I - KW^{1/2}B^{-1}W^{1/2}
			
			f = np.matmul(c0, self.mu) + np.matmul(self.Sigma, b) - np.matmul(K_W_sr, x1) 
			
			# print(self.f.reshape(-1))

	def predict_proba(self, x, mu):

		SigmaX = self.kernel.__call__(x, self.X); mu= mu.reshape((-1, 1))
		fx0 = np.matmul(SigmaX, (self.Y + 1) / 2 - 1/(1 + np.exp(-self.f)))
		fx1 = np.matmul(np.matmul(SigmaX, self.SigmaInv), self.mu)
		fx = mu + fx0 - fx1
		P = np.zeros((len(x), 2))
		
		P[:, 0] = (1 - 1/(1 + np.exp(-fx))).reshape(-1)
		P[:, 1] = 1 - P[:, 0]
		return P

	def predict(self, x, mu):
		P = self.predict_proba(x, mu)
		PLabel = np.zeros(len(x));
		PLabel[P[:, -1]>=0.5] = 1; PLabel[P[:, -1]<0.5]=0
		return PLabel
		


def SetPrior(args, Feat):

	if args.DataType == 'Syn':
		if args.Preprocess == 'Observation2':
			Mean0 = np.zeros(args.Feat); Mean1 = np.zeros(args.Feat);
			Mean0[0] = Mean0[0] - args.Delta / 2; Mean1[0] = Mean1[0] + args.Delta / 2 
			Cov0 = np.diag(np.ones(args.Feat)); Cov1 = np.diag(np.ones(args.Feat))
		elif args.Preprocess == 'ObservationVar2':
			Mean0 = np.zeros(args.Feat); Mean1 = np.zeros(args.Feat);
			Cov0 = np.diag(np.ones(args.Feat)); Cov1 = np.diag(np.ones(args.Feat) + args.Delta)

		# Construct true rank
		pdf0 = multivariate_normal.pdf(Feat, mean = Mean0, cov = Cov0); 
		pdf1 = multivariate_normal.pdf(Feat, mean = Mean1, cov = Cov1); 
		Posterior1 = pdf1 / (pdf0 + pdf1)
		Prob1Rank = np.argsort(-Posterior1)

		# Add noise to rank 
		NoiseLevel = 0.1
		Ind = sample(list(Prob1Rank), int(len(Prob1Rank) * NoiseLevel))

		for i in range(int(len(Ind) / 2)):
			t = Prob1Rank[Ind[i]]	
			Prob1Rank[Ind[i]] = Prob1Rank[Ind[-i]]
			Prob1Rank[Ind[-i]] = t 

		mu = np.zeros(len(Feat))
		Delta = 0.4 / (len(Prob1Rank) - 1)
		MeanPrior = 0.2 
		for i in range(len(Prob1Rank)):
			mu[Prob1Rank[i]] = MeanPrior
			MeanPrior-=Delta
	return mu


class CausGP():
	def __init__(self, args, kernel=RBF(5)):
		# self.kernel = 1.0 * RBF(1.0)
		# self.kernel = ConstantKernel(constant_value=1.0) 
		self.kernel = kernel
		self.Cls0 = None; 
		self.Cls1 = None;
		self.Cls = None
		self.args = args 
	def fit(self, X, Y, FullModelUpdate=1): # X is a set of training feats comprised of covariate and reponse; Y is the assignment
		X_Cov = X[:, :-1]; Y_R = X[:, -1]; Y=Y.reshape(-1)
		X_Cov0 = X_Cov[Y==0]; X_Cov1 = X_Cov[Y==1]
		Y_R0 = Y_R[Y==0]; Y_R1 = Y_R[Y==1]
	
		if FullModelUpdate == 1:
			self.Cls0 = GaussianProcessRegressor(kernel=self.kernel, alpha=0.001).fit(X_Cov0, Y_R0.reshape((-1, 1)))
			self.Cls1 = GaussianProcessRegressor(kernel=self.kernel, alpha=0.001).fit(X_Cov1, Y_R1.reshape((-1, 1)))

		if self.args.cls=='GPCausallogistic' or self.args.cls=='GPCausallogistic2':
			self.Cls = LogisticRegression(random_state=self.args.Trial, solver='lbfgs'); 
		elif self.args.cls == 'GPCausalSVC':
			self.Cls = SVC(gamma='auto', kernel='rbf', random_state=self.args.Trial, probability = True); 
		elif self.args.cls == 'GPCausalknn':
			self.Cls = KNeighborsClassifier(algorithm='auto', n_neighbors=math.ceil(len(X)**(self.args.gr))); 
		elif self.args.cls =='GPCausalNN':
			# Cls = BuildNN(len(X)); 
			self.Cls = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(X.shape[1], 4), random_state=1)
			# X = torch.tensor(X).float(); Y = torch.tensor(Y.reshape(-1)).long();
		elif self.args.cls == 'GPCausalGPCls':
			self.Cls = GaussianProcessClassifier(kernel='rbf', random_state=self.args.Trial)

		if self.args.cls=='GPCausallogistic2':
			self.Cls.fit(X[:, -1].reshape((-1,1)),Y); 
		else:
			self.Cls.fit(X,Y); 
		
	def predict_EffectSize(self, X):

		X_Cov = X[:, :-1];
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
		EffectSize = (PY_Mean_1 - PY_Mean_0) / np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2).reshape((-1,1))
		return EffectSize

	def AcquisitionFunc(self, X, Beta=1):

		X_Cov = X[:, :-1];
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
		# Score = PY_Mean_1 - PY_Mean_0 + Beta * (PY_Std_0**2 + PY_Std_1**2) # Acquistion function one
		# Score = PY_Std_0**2 + PY_Std_1**2 # Acquisition function two
		Score = PY_Mean_1 - PY_Mean_0 # Acquistion function two
		return Score

	def predict_proba(self, X):
		if self.args.cls == 'GPCausallogistic2':
			AllPredProb = self.Cls.predict_proba(X[:, -1].reshape((-1,1)));
		else:
			AllPredProb = self.Cls.predict_proba(X);
		return AllPredProb
	def predict_proba2(self, X): # feat is the concatenation of 
		X_Cov = X[:, :-1]; Y_R = X[:, -1].reshape((-1, 1))
		
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)

		PY_Mean_0 = PY_Mean_0.reshape((-1, 1)); PY_Std_0 = PY_Std_0.reshape((-1, 1))
		PY_Mean_1 = PY_Mean_1.reshape((-1, 1)); PY_Std_1 = PY_Std_1.reshape((-1, 1))

		MahDistance0 =  (Y_R - PY_Mean_0) / PY_Std_0
		Const0 = PY_Std_0 * np.sqrt(2 * 3.14)

		MahDistance1 =  (Y_R - PY_Mean_1) / PY_Std_1
		Const1 = PY_Std_1 * np.sqrt(2 * 3.14)

		RegressionProb0 = 1/ Const0 * np.exp(-1/2 * MahDistance0 ** 2)
		RegressionProb1 = 1/ Const1 * np.exp(-1/2 * MahDistance1 ** 2)

		Prob = np.zeros((len(X), 2))
		Prob[:, 0] =  (RegressionProb0 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		Prob[:, 1] =  (RegressionProb1 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		return Prob

	def predict_CohortEffectSize(self, X, Y):
		if len(np.unique(Y)) > 1:
			Res = X[:, -1]
			CtRes = Res[Y==0]; TrRes = Res[Y==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan
		return EffectSize

	def predict_CohortEffectSize2(self, X, Y):
		Thres = self.args.Thres
		X_Cov = X[:, :-1];

		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
	
	
		EstPointWiseEffectSize = (PY_Mean_1 - PY_Mean_0).reshape((-1, 1)) / (np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2)).reshape((-1,1))
		CohortMask = (EstPointWiseEffectSize>=Thres).reshape(-1);
		Ind = np.where(CohortMask == 1)[0]

		Ind2 = Ind[np.random.choice(len(Ind), min(self.args.GroupSize, len(Ind)), replace=False)]
		print("The number of selected data: %d"%len(Ind2))
		# print("The number of selected data: %d"%np.sum(CohortMask))
		# estimated effect size by th GP
		# EstEffectSize = np.mean(PY_Mean_1[CohortMask] - PY_Mean_0[CohortMask]) / np.sqrt((np.sum(PY_Std_0[CohortMask] ** 2) + np.sum(PY_Std_1[CohortMask] ** 2))/
			# (np.sum(CohortMask) * 2))
		Y_Cohort = Y[Ind2]; Res_Cohort = X[Ind2, -1]

		if len(np.unique(Y_Cohort)) > 1:
			CtRes = Res_Cohort[Y_Cohort==0]; TrRes = Res_Cohort[Y_Cohort==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan; 
			# EstEffectSize = np.nan;D = np.nan;
		return EffectSize

	def predict_PredictionIndex(self, X, Y):
		Thres = self.args.Thres
		X_Cov = X[:, :-1];

		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)	
	
		EstPointWiseEffectSize = (PY_Mean_1 - PY_Mean_0).reshape((-1, 1)) / (np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2)).reshape((-1,1))
		CohortMask = (EstPointWiseEffectSize>=Thres).reshape(-1);
		Ind = np.where(CohortMask == 1)[0]
		return Ind 

class ScaledCausGP():
	def __init__(self, args, kernel=RBF(5)):
		# self.kernel = 1.0 * RBF(1.0)
		# self.kernel = ConstantKernel(constant_value=1.0) 
		self.kernel = kernel
		self.Cls0 = None; 
		self.Cls1 = None;
		self.Cls = None
		self.X_Mean = None 
		self.X_Std = None 
		self.args = args 
	def fit(self, X, Y, FullModelUpdate=1): # X is a set of training feats comprised of covariate and reponse; Y is the assignment
		# preprocessing
		self.X_Mean = np.mean(X, axis=0); self.X_Std = np.std(X, axis=0)
		X = (X - self.X_Mean) / self.X_Std

		X_Cov = X[:, :-1]; Y_R = X[:, -1]; Y=Y.reshape(-1)
		X_Cov0 = X_Cov[Y==0]; X_Cov1 = X_Cov[Y==1]
		Y_R0 = Y_R[Y==0]; Y_R1 = Y_R[Y==1]

		if FullModelUpdate == 1:
			self.Cls0 = GaussianProcessRegressor(kernel=self.kernel, alpha=0.001).fit(X_Cov0, Y_R0.reshape((-1, 1)))
			self.Cls1 = GaussianProcessRegressor(kernel=self.kernel, alpha=0.001).fit(X_Cov1, Y_R1.reshape((-1, 1)))

		if self.args.cls=='GPCausallogistic' or self.args.cls=='GPCausallogistic2' or self.args.cls=='ScaledGPCausallogistic':
			self.Cls = LogisticRegression(random_state=self.args.Trial, solver='lbfgs'); 
		elif self.args.cls == 'GPCausalSVC' or self.args.cls=='ScaledGPCausalSVC':
			self.Cls = SVC(gamma='auto', kernel='rbf', random_state=self.args.Trial, probability = True); 
		elif self.args.cls == 'GPCausalknn':
			self.Cls = KNeighborsClassifier(algorithm='auto', n_neighbors=math.ceil(len(X)**(self.args.gr))); 
		elif self.args.cls =='GPCausalNN':
			# Cls = BuildNN(len(X)); 
			self.Cls = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(X.shape[1], 4), random_state=1)
			# X = torch.tensor(X).float(); Y = torch.tensor(Y.reshape(-1)).long();
		elif self.args.cls == 'GPCausalGPCls':
			self.Cls = GaussianProcessClassifier(kernel='rbf', random_state=self.args.Trial)

		if self.args.cls=='GPCausallogistic2':
			self.Cls.fit(X[:, -1].reshape((-1,1)),Y); 
		else:
			self.Cls.fit(X,Y); 
		
	def predict_EffectSize(self, X):
		X = (X - self.X_Mean)/self.X_Std 
		X_Cov = X[:, :-1];
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
		EffectSize = (PY_Mean_1 - PY_Mean_0) / np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2).reshape((-1,1))
		return EffectSize

	def AcquisitionFunc(self, X, Beta=1):
		X = (X - self.X_Mean)/self.X_Std 
		X_Cov = X[:, :-1];
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
		# Score = PY_Mean_1 - PY_Mean_0 + Beta * (PY_Std_0**2 + PY_Std_1**2) # Acquistion function one
		# Score = PY_Std_0**2 + PY_Std_1**2 # Acquisition function two
		Score = PY_Mean_1 - PY_Mean_0 # Acquistion function two
		return Score

	def predict_proba(self, X):
		X = (X - self.X_Mean)/self.X_Std 
		if self.args.cls == 'GPCausallogistic2':
			AllPredProb = self.Cls.predict_proba(X[:, -1].reshape((-1,1)));
		else:
			AllPredProb = self.Cls.predict_proba(X);
		return AllPredProb
	def predict_proba2(self, X): # feat is the concatenation of 
		X = (X - self.X_Mean)/self.X_Std 
		X_Cov = X[:, :-1]; Y_R = X[:, -1].reshape((-1, 1))
		
		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)

		PY_Mean_0 = PY_Mean_0.reshape((-1, 1)); PY_Std_0 = PY_Std_0.reshape((-1, 1))
		PY_Mean_1 = PY_Mean_1.reshape((-1, 1)); PY_Std_1 = PY_Std_1.reshape((-1, 1))

		MahDistance0 =  (Y_R - PY_Mean_0) / PY_Std_0
		Const0 = PY_Std_0 * np.sqrt(2 * 3.14)

		MahDistance1 =  (Y_R - PY_Mean_1) / PY_Std_1
		Const1 = PY_Std_1 * np.sqrt(2 * 3.14)

		RegressionProb0 = 1/ Const0 * np.exp(-1/2 * MahDistance0 ** 2)
		RegressionProb1 = 1/ Const1 * np.exp(-1/2 * MahDistance1 ** 2)

		Prob = np.zeros((len(X), 2))
		Prob[:, 0] =  (RegressionProb0 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		Prob[:, 1] =  (RegressionProb1 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		return Prob

	def predict_CohortEffectSize(self, X, Y):
		X = (X - self.X_Mean)/self.X_Std 
		if len(np.unique(Y)) > 1:
			Res = X[:, -1]
			CtRes = Res[Y==0]; TrRes = Res[Y==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan
		return EffectSize

	def predict_CohortEffectSize2(self, X, Y):
		X = (X - self.X_Mean)/self.X_Std 
		Thres = self.args.Thres
		X_Cov = X[:, :-1];

		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)
	
	
		EstPointWiseEffectSize = (PY_Mean_1 - PY_Mean_0).reshape((-1, 1)) / (np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2)).reshape((-1,1))
		CohortMask = (EstPointWiseEffectSize>=Thres).reshape(-1);
		Ind = np.where(CohortMask == 1)[0]

		Ind2 = Ind[np.random.choice(len(Ind), min(self.args.GroupSize, len(Ind)), replace=False)]
		print("The number of selected data: %d"%len(Ind2))
		# print("The number of selected data: %d"%np.sum(CohortMask))
		# estimated effect size by th GP
		# EstEffectSize = np.mean(PY_Mean_1[CohortMask] - PY_Mean_0[CohortMask]) / np.sqrt((np.sum(PY_Std_0[CohortMask] ** 2) + np.sum(PY_Std_1[CohortMask] ** 2))/
			# (np.sum(CohortMask) * 2))
		Y_Cohort = Y[Ind2]; Res_Cohort = X[Ind2, -1]

		if len(np.unique(Y_Cohort)) > 1:
			CtRes = Res_Cohort[Y_Cohort==0]; TrRes = Res_Cohort[Y_Cohort==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan; 
			# EstEffectSize = np.nan;D = np.nan;
		return EffectSize

	def predict_PredictionIndex(self, X, Y):
		X = (X - self.X_Mean)/self.X_Std 
		Thres = self.args.Thres
		X_Cov = X[:, :-1];

		PY_Mean_0, PY_Std_0 = self.Cls0.predict(X_Cov, return_std=True)
		PY_Mean_1, PY_Std_1 = self.Cls1.predict(X_Cov, return_std=True)	
	
		EstPointWiseEffectSize = (PY_Mean_1 - PY_Mean_0).reshape((-1, 1)) / (np.sqrt((PY_Std_1**2 + PY_Std_0**2)/2)).reshape((-1,1))
		CohortMask = (EstPointWiseEffectSize>=Thres).reshape(-1);
		Ind = np.where(CohortMask == 1)[0]
		return Ind 

class CausLR():
	def __init__(self, args = None):
		self.LR0 = None
		self.LR1 = None
		self.args = args 
	def fit(self, X, Y, FullModelUpdate=1): # X is a set of training feats comprised of covariate and reponse; Y is the assignment
		X_Cov = X[:, :-1]; Y_R = X[:, -1]; Y=Y.reshape(-1)
		X_Cov0 = X_Cov[Y==0]; X_Cov1 = X_Cov[Y==1]

		Y_R0 = Y_R[Y==0]; Y_R1 = Y_R[Y==1]
		if FullModelUpdate==1:
			self.LR0 = LinearRegression().fit(X_Cov0, Y_R0.reshape((-1, 1)))
			self.LR1 = LinearRegression().fit(X_Cov1, Y_R1.reshape((-1, 1)))
		if self.args.cls=='Causallogistic':
			self.Cls = LogisticRegression(random_state=self.args.Trial, solver='lbfgs'); 
		elif self.args.cls == 'CausalSVC':
			self.Cls = SVC(gamma='auto', kernel='rbf', random_state=self.args.Trial, probability = True); 
		elif self.args.cls == 'Causalknn':
			self.Cls = KNeighborsClassifier(algorithm='auto', n_neighbors=math.ceil(len(X)**(self.args.gr))); 
		elif self.args.cls =='CausalNN':
			# Cls = BuildNN(len(X)); 
			self.Cls = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(X.shape[1], 4), random_state=1)
			# X = torch.tensor(X).float(); Y = torch.tensor(Y.reshape(-1)).long();
		elif self.args.cls == 'CausalGPCls':
			self.Cls = GaussianProcessClassifier(kernel='rbf', random_state=self.args.Trial)

		self.Cls.fit(X,Y); 

	def AcquisitionFunc(self, X, Beta=1):

		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov)
		PY_Mean_1 = self.LR1.predict(X_Cov)
		# Score = PY_Mean_1 - PY_Mean_0 + Beta * (PY_Std_0**2 + PY_Std_1**2) # Acquistion function one
		# Score = PY_Std_0**2 + PY_Std_1**2 # Acquisition function two
		Score = PY_Mean_1 - PY_Mean_0 # Acquistion function two
		return Score
	def predict_proba(self, X):
		AllPredProb = self.Cls.predict_proba(X);
		return AllPredProb
	def predict_proba2(self, X): # feat is the concatenation of
		X_Cov = X[:, :-1]; Y_R = X[:, -1].reshape((-1, 1))

		PY_Mean_0 = self.LR0.predict(X_Cov)
		PY_Mean_1 = self.LR1.predict(X_Cov)

		PY_Mean_0 = PY_Mean_0.reshape((-1, 1));
		PY_Mean_1 = PY_Mean_1.reshape((-1, 1));

		MahDistance0 =  Y_R - PY_Mean_0
		MahDistance1 =  Y_R - PY_Mean_1
		Const = np.sqrt(2 * 3.14)
		RegressionProb0 = 1/ Const * np.exp(-1/2 * MahDistance0 ** 2)
		RegressionProb1 = 1/ Const * np.exp(-1/2 * MahDistance1 ** 2)

		Prob = np.zeros((len(X), 2))
		Prob[:, 0] =  (RegressionProb0 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		Prob[:, 1] =  (RegressionProb1 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		return Prob

	def predict_EffectSize(self, X):
		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov); PY_Mean_1 = self.LR1.predict(X_Cov)
		EffectSize = PY_Mean_1 - PY_Mean_0
		return EffectSize
	def predict_AvgEffectSize(self, X, Y):

		CtRes = X[Y==0, -1]; TrRes = X[Y==1, -1]; 
		CtN = len(CtRes); TrN = len(TrRes)

		EffectDifference = np.mean(TrRes) - np.mean(CtRes)
		STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
		EffectSize = EffectDifference / STD
		return EffectSize

	def predict_CohortEffectSize(self, X, Y):
		Thres = self.args.Thres
		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov); PY_Mean_1 = self.LR1.predict(X_Cov)
		EffectSize = (PY_Mean_1 - PY_Mean_0) / np.sqrt(2)
		CohortMask = (EffectSize>=Thres).reshape(-1)
		
		Y_Cohort = Y[CohortMask]; Res_Cohort = X[CohortMask, -1]
		if len(np.unique(Y_Cohort)) > 1:
			CtRes = Res_Cohort[Y_Cohort==0]; TrRes = Res_Cohort[Y_Cohort==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan

		return EffectSize

class ScaledCausLR():
	def __init__(self, args = None):
		self.LR0 = None
		self.LR1 = None
		self.args = args 
		self.X_Mean = None 
		self.X_Std = None 
	def fit(self, X, Y, FullModelUpdate=1): # X is a set of training feats comprised of covariate and reponse; Y is the assignment
		self.X_Mean = np.mean(X, axis=0); self.X_Std = np.std(X, axis=0)
		X = (X - self.X_Mean) / self.X_Std

		X_Cov = X[:, :-1]; Y_R = X[:, -1]; Y=Y.reshape(-1)
		X_Cov0 = X_Cov[Y==0]; X_Cov1 = X_Cov[Y==1]

		Y_R0 = Y_R[Y==0]; Y_R1 = Y_R[Y==1]
		if FullModelUpdate==1:
			self.LR0 = LinearRegression().fit(X_Cov0, Y_R0.reshape((-1, 1)))
			self.LR1 = LinearRegression().fit(X_Cov1, Y_R1.reshape((-1, 1)))
		if self.args.cls=='Causallogistic' or self.args.cls=='ScaledCausallogistic':
			self.Cls = LogisticRegression(random_state=self.args.Trial, solver='lbfgs'); 
		elif self.args.cls == 'CausalSVC' or self.args.cls=='ScaledCausalSVC':
			self.Cls = SVC(gamma='auto', kernel='rbf', random_state=self.args.Trial, probability = True); 
		elif self.args.cls == 'Causalknn':
			self.Cls = KNeighborsClassifier(algorithm='auto', n_neighbors=math.ceil(len(X)**(self.args.gr))); 
		elif self.args.cls =='CausalNN':
			# Cls = BuildNN(len(X)); 
			self.Cls = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(X.shape[1], 4), random_state=1)
			# X = torch.tensor(X).float(); Y = torch.tensor(Y.reshape(-1)).long();
		elif self.args.cls == 'CausalGPCls':
			self.Cls = GaussianProcessClassifier(kernel='rbf', random_state=self.args.Trial)

		self.Cls.fit(X,Y); 

	def AcquisitionFunc(self, X, Beta=1):
		X = (X - self.X_Mean) / self.X_Std
		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov)
		PY_Mean_1 = self.LR1.predict(X_Cov)
		# Score = PY_Mean_1 - PY_Mean_0 + Beta * (PY_Std_0**2 + PY_Std_1**2) # Acquistion function one
		# Score = PY_Std_0**2 + PY_Std_1**2 # Acquisition function two
		Score = PY_Mean_1 - PY_Mean_0 # Acquistion function two
		return Score
	def predict_proba(self, X):
		X = (X - self.X_Mean) / self.X_Std
		AllPredProb = self.Cls.predict_proba(X);
		return AllPredProb
	def predict_proba2(self, X): # feat is the concatenation of
		X = (X - self.X_Mean) / self.X_Std
		X_Cov = X[:, :-1]; Y_R = X[:, -1].reshape((-1, 1))

		PY_Mean_0 = self.LR0.predict(X_Cov)
		PY_Mean_1 = self.LR1.predict(X_Cov)

		PY_Mean_0 = PY_Mean_0.reshape((-1, 1));
		PY_Mean_1 = PY_Mean_1.reshape((-1, 1));

		MahDistance0 =  Y_R - PY_Mean_0
		MahDistance1 =  Y_R - PY_Mean_1
		Const = np.sqrt(2 * 3.14)
		RegressionProb0 = 1/ Const * np.exp(-1/2 * MahDistance0 ** 2)
		RegressionProb1 = 1/ Const * np.exp(-1/2 * MahDistance1 ** 2)

		Prob = np.zeros((len(X), 2))
		Prob[:, 0] =  (RegressionProb0 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		Prob[:, 1] =  (RegressionProb1 / (RegressionProb0 + RegressionProb1)).reshape(-1)
		return Prob

	def predict_EffectSize(self, X):
		X = (X - self.X_Mean) / self.X_Std
		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov); PY_Mean_1 = self.LR1.predict(X_Cov)
		EffectSize = PY_Mean_1 - PY_Mean_0
		return EffectSize
	def predict_AvgEffectSize(self, X, Y):
		X = (X - self.X_Mean) / self.X_Std
		CtRes = X[Y==0, -1]; TrRes = X[Y==1, -1]; 
		CtN = len(CtRes); TrN = len(TrRes)

		EffectDifference = np.mean(TrRes) - np.mean(CtRes)
		STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
		EffectSize = EffectDifference / STD
		return EffectSize

	def predict_CohortEffectSize(self, X, Y):
		X = (X - self.X_Mean) / self.X_Std
		Thres = self.args.Thres
		X_Cov = X[:, :-1];
		PY_Mean_0 = self.LR0.predict(X_Cov); PY_Mean_1 = self.LR1.predict(X_Cov)
		EffectSize = (PY_Mean_1 - PY_Mean_0) / np.sqrt(2)
		CohortMask = (EffectSize>=Thres).reshape(-1)
		
		Y_Cohort = Y[CohortMask]; Res_Cohort = X[CohortMask, -1]
		if len(np.unique(Y_Cohort)) > 1:
			CtRes = Res_Cohort[Y_Cohort==0]; TrRes = Res_Cohort[Y_Cohort==1]; 
			CtN = len(CtRes); TrN = len(TrRes)

			EffectDifference = np.mean(TrRes) - np.mean(CtRes)
			STD = np.sqrt(((TrN - 1) * np.var(TrRes) + (CtN - 1) * np.var(CtRes)) / (CtN + TrN - 2))
			EffectSize = EffectDifference / STD
		else:
			EffectSize = np.nan

		return EffectSize